@inproceedings{silver2014deterministic,
  title={Deterministic Policy Gradient Algorithms},
  author={Silver, David and Lever, Guy and Heess, Nicolas and Degris, Thomas and Wierstra, Daan and Riedmiller, Martin},
  booktitle={International Conference on Machine Learning (ICML)},
  pages={387--395},
  year={2014}
}


@article{silver2016mastering,
  title={Mastering the game of {Go} with deep neural networks and tree search},
  author={Silver, David and Huang, Aja and Maddison, Chris J and Guez, Arthur and Sifre, Laurent and Van Den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc},
  journal={nature},
  volume={529},
  number={7587},
  pages={484},
  year={2016}
}


@inproceedings{sutton2000policy,
  title={Policy gradient methods for reinforcement learning with function approximation},
  author={Sutton, Richard S and McAllester, David A and Singh, Satinder P and Mansour, Yishay},
  booktitle={Advances in Neural Information Processing Systems (NIPS)},
  pages={1057--1063},
  year={2000}
}


@article{sutton2008convergent,
  title={A convergent O (n) algorithm for off-policy temporal-difference learning with linear function approximation},
  author={Sutton, Richard S and Szepesv{\'a}ri, Csaba and Maei, Hamid Reza},
  journal={Advances in Neural Information Processing Systems (NIPS)},
  volume={21},
  number={21},
  pages={1609--1616},
  year={2008}
}


@inproceedings{sutton2009fast,
  title={Fast gradient-descent methods for temporal-difference learning with linear function approximation},
  author={Sutton, Richard S and Maei, Hamid Reza and Precup, Doina and Bhatnagar, Shalabh and Silver, David and Szepesv{\'a}ri, Csaba and Wiewiora, Eric},
  booktitle={International Conference on Machine Learning (ICML)},
  pages={993--1000},
  year={2009}
}